from google.colab import drive
drive.mount('/content/drive')
Path = "/content/drive/My Drive/flowers/data/"
# import needed libraries
import os as os
from PIL import Image
from array import array
import cv2 as cv2
from glob import glob
import time
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.utils import np_utils
from sklearn import preprocessing
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import BatchNormalization
from keras import optimizers
from keras.optimizers import Adam
from keras import regularizers
from keras import layers
seed = 7
np.random.seed(seed)
# for API
from keras.models import Model
from keras.layers import Input, Concatenate, Dense, Dropout, Flatten, Activation
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical
#from keras import backend as K
#K.set_image_dim_ordering( 'tf' )
listing = os.listdir( Path )
num_folders = len(listing)
print ( num_folders)
print (listing)
## Analyze the image dataset
# Read a sample image file
import skimage.io as io
path = "/content/drive/My Drive/flowers/data/0/"
files = os.listdir(path)
#print(len(files))
#print(files[0])
#print(files[1])
imgfile = files[0]
img = io.imread(path + imgfile)
plt.imshow(img)
plt.show()
pwd
!ls -l /content/drive/MyDrive/flowers/data/0
#Check out few file names for the photos -
#for Linux uncomment the following line
!ls drive/MyDrive/flowers/data/0 | head -5
#Load an image using Keras image class
image_file = 'drive/MyDrive/flowers/data/0/image_0001.jpg'
flower_image = tf.keras.preprocessing.image.load_img(image_file)
#Inspect image
flower_image
flower_image2 = tf.keras.preprocessing.image.load_img('drive/MyDrive/flowers/data/0/image_0003.jpg')
flower_image2
flower_image.size
# image is of shape 704 * 500 with 3 channels
flower_image2.size
#Convert an image to numpy array
flower_array = tf.keras.preprocessing.image.img_to_array(flower_image2)
print('Shape of numpy array', flower_array.shape)
# Display images and their labels
trainArray = [[]]
trainImagesPaths = []
trainImagesCategories = []
trainImg = []
scaleTo = 128
seed = 7
t0=time.time()
for imgFolder in listing:
print(imgFolder)
path = "/content/drive/My Drive/flowers/data/" + imgFolder + '/'
files = os.listdir( path )
for imgFile in files:
imgPath = path + imgFile
trainArray.append([imgPath, imgFolder]) # image path, image folder
trainImagesPaths.append(imgPath) # paths to images
trainImagesCategories.append(imgFolder) # labels
trainImg.append(cv2.resize(cv2.imread(imgPath), (scaleTo, scaleTo))) # images
t1=time.time()
print(t1-t0," seconds")
trainImagesPaths[1] # gives the path to the first image
trainImgNParray = np.asarray(trainImg) # create an array of all the images (not the paths)
trainlabel = pd.DataFrame(trainImagesCategories) # dataframe of all the categories matching each image
# check the length of the training images for confirmation
len(trainImgNParray)
# check the length of labels for confirmation
len(trainlabel)
# checking the shape of the first image.
trainImgNParray.shape
print(trainImagesPaths[0:5])
from sklearn.model_selection import train_test_split
X_train, X_test,y_train, y_test = train_test_split(trainImgNParray, trainImagesCategories, test_size=0.2, random_state=2)
X_train = np.array(X_train)
print(X_train.shape)
X_test = np.array(X_test)
print(X_test.shape)
train_X = X_train.astype('float32')
train_X = X_train.reshape(X_train.shape[0],128,128,3)
train_X = X_train / 255.
test_X = X_test.astype('float32')
test_X = X_test.reshape(X_test.shape[0], 128,128,3)
test_X = X_test / 255.
print('\nTraining data : ', train_X.shape) # TRAIN
print('\nTesting data : ', test_X.shape) # TEST
# Flatten the images data
# Training data
x_train = []
for i in range(1100):
x_train.append(train_X[i,:].flatten())
# Testing data
x_test = []
for i in range(275):
x_test.append(test_X[i,:].flatten())
train_Y=y_train[0:1100]
test_Y=y_test[0:275]
# Print data lengths
print("\n Training data:",len(x_train))
print("\n Training Target data:",len(train_Y))
print("\n Testing data:",len(x_test))
print("\n Testing Target data:",len(test_Y))
# KNN - Model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
KNN = KNeighborsClassifier(n_neighbors=19)
KNN.fit(x_train, train_Y)
y_pred = KNN.predict(x_train)
KNN_Accuracy_Train=accuracy_score(train_Y, y_pred)
print("Train Accuracy : ", KNN_Accuracy_Train)
y_pred = KNN.predict(x_test)
KNN_Accuracy_Test=accuracy_score(test_Y, y_pred)
print("Test Accuracy : ", KNN_Accuracy_Test)
# RANDOM FOREST
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Model
model_RF = RandomForestClassifier(n_estimators=20,
criterion='gini',
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0.0,
max_features='auto',
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_impurity_split=None,
bootstrap=True,
oob_score=False,
n_jobs=None,
random_state=None,
verbose=0,
warm_start=False,
class_weight=None,
ccp_alpha=0.0,
max_samples=None,)
model_RF.fit(x_train, train_Y)
# Accuracy
pred_RF = model_RF.predict(x_test)
RF_Train = model_RF.score(x_train, train_Y)
RF_Test = accuracy_score(test_Y, pred_RF)
# Output
print("Train Accuracy:",RF_Train)
print("Test Accuracy:",RF_Test)
def plot_images(original_image, updated_image):
plt.figure(figsize=(15,8))
#Original image
plt.subplot(1,2,1)
plt.title('Original image')
plt.imshow(original_image)
#Transformed image
plt.subplot(1,2,2)
plt.title('Transformed image')
plt.imshow(updated_image)
plt.show()
## Applying different filters on the images - Blur
from PIL import Image, ImageFilter
image = Image.open(trainImagesPaths[400])
image2 = image.filter(ImageFilter.BLUR)
#Show both original and transormed image
plot_images(image, image2)
## Applying different filters on the images - Contour
from PIL import Image, ImageFilter
image = Image.open(trainImagesPaths[400])
image2 = image.filter(ImageFilter.CONTOUR)
#Show both original and transormed image
plot_images(image, image2)
## Applying different filters on the images - edge detection
image = Image.open(trainImagesPaths[400])
image2 = image.filter(ImageFilter.FIND_EDGES)
#Show both original and transormed image
plot_images(image, image2)
## Applying different filters on the images - Emboss
image = Image.open(trainImagesPaths[400])
image2 = image.filter(ImageFilter.EMBOSS)
#Show both original and transormed image
plot_images(image, image2)
## Applying different filters on the images - Smooth
image = Image.open(trainImagesPaths[400])
image2 = image.filter(ImageFilter.SMOOTH)
#Show both original and transormed image
plot_images(image, image2)
## Applying different filters on the images - Sharpen
image = Image.open(trainImagesPaths[400])
image2 = image.filter(ImageFilter.SHARPEN)
#Show both original and transormed image
plot_images(image, image2)
# Define Train and Test using Image Data Generator
img_size = 128
img_depth = 3
#ImageDataGenerator declaration with 20% data as test (80% for training)
# Since the number of records is less for 17 classes add appropriate image augmentation techniques
#img_generator= tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=20,
#horizontal_flip=True,
#width_shift_range=0.2,
#height_shift_range=0.2,
#validation_split=0.2)
img_generator= tf.keras.preprocessing.image.ImageDataGenerator( validation_split=0.2)
#Build training generator.
train_generator = img_generator.flow_from_directory('/content/drive/My Drive/flowers/data/',
target_size=(img_size, img_size),
subset='training',
batch_size=64)
#Build test generator
test_generator = img_generator.flow_from_directory('/content/drive/My Drive/flowers/data/',
target_size=(img_size, img_size),
subset='validation',
batch_size=64)
#Lets check the features (images) and Labels (flower class) returned by ImageDataGenerator
X, y = next(train_generator)
print('Input features shape', X.shape)
print('Actual labels shape', y.shape)
## Analysing Y values for labels
y[15]
## Create Neural Network Model
num_classes=17
model = tf.keras.models.Sequential()
#Reshape data from 2D to 1D
model.add(tf.keras.layers.Reshape((49152,),input_shape=(128,128,3)))
#Normalize the data
model.add(tf.keras.layers.BatchNormalization())
# Adding Hidden Layers and Output Layer
model.add(Dense(256, activation='relu')) ###Multiple Dense units with Relu activation
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
#Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()
model.fit(train_generator,
epochs=20,
steps_per_epoch= 1101//64, #Number of batches per epoch
validation_data=test_generator,
validation_steps = 274/64) #Number of test images//batch_size
## Validation Accuracy
model.evaluate(test_generator)
model.evaluate(train_generator)
model.save('/content/drive/MyDrive/Colab Notebooks/CNNProject2/flowers_nn.h5')
#Clear any previous model from memory
tf.keras.backend.clear_session()
#Initialize model
model_cnn = tf.keras.models.Sequential()
#normalize data
model_cnn.add(tf.keras.layers.BatchNormalization(input_shape=(img_size,img_size,3,)))
#Add Conv Layer
model_cnn.add(tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu'))
#Add Max Pool layer
model_cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
#Add Conv Layer
model_cnn.add(tf.keras.layers.Conv2D(64, kernel_size=(4,4), activation='relu'))
#Add Max Pool layer
model_cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
#Add Conv Layer
model_cnn.add(tf.keras.layers.Conv2D(128, kernel_size=(3,3), activation='relu'))
#Add Max Pool layer
model_cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
#Add Conv Layer
model_cnn.add(tf.keras.layers.Conv2D(128, kernel_size=(2,2), activation='relu'))
#Add Max Pool layer
model_cnn.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
#Add Dense Layers after flattening the data
model_cnn.add(tf.keras.layers.Flatten())
model_cnn.add(tf.keras.layers.Dense(128, activation='relu'))
model_cnn.add(tf.keras.layers.Dense(64, activation='relu'))
model_cnn.add(tf.keras.layers.Dense(32, activation='relu'))
#Add Dropout
model_cnn.add(tf.keras.layers.Dropout(0.25))
#Add Output Layer
model_cnn.add(tf.keras.layers.Dense(17, activation='softmax'))
#Specify Loass and Optimizer
model_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#Model Summary
model_cnn.summary()
#Saving the best model using model checkpoint callback
model_checkpoint=tf.keras.callbacks.ModelCheckpoint('/content/drive/MyDrive/Colab Notebooks/CNNProject2/flowers_nn.h5', #where to save the model
save_best_only=True,
monitor='val_accuracy',
mode='max',
verbose=1)
model_cnn.fit(train_generator,
epochs=30,
steps_per_epoch= 1101//64, #Number of batches per epoch
validation_data=test_generator,
validation_steps = 274//64) #Number of test images//batch_size
## Validation Accuracy
model_cnn.evaluate(test_generator)
## Training Accuracy
model_cnn.evaluate(train_generator)
#Clear any previous model from memory
tf.keras.backend.clear_session()
pretrained_model = tf.keras.applications.MobileNetV2(input_shape=[128,128,3], include_top=False)
#pretrained_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False ,input_shape=[*IMAGE_SIZE, 3])
#pretrained_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=[*IMAGE_SIZE, 3])
#pretrained_model = tf.keras.applications.MobileNet(weights='imagenet', include_top=False, input_shape=[*IMAGE_SIZE, 3])
pretrained_model.trainable = False
model_tf = tf.keras.Sequential([
pretrained_model,
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(17, activation='softmax')
])
model_tf.compile(
optimizer='adam',
loss = 'categorical_crossentropy',
metrics=['accuracy']
)
model_tf.summary()
model_tf.fit(train_generator,
epochs=10,
steps_per_epoch= 1101//64, #Number of batches per epoch
validation_data=test_generator,
validation_steps = 274//64) #Number of test images//batch_size
model_tf.evaluate(train_generator)
model_tf.evaluate(test_generator)
import tkinter as tk
from tkinter import ttk
import pandas as pd
import re
import os
import cv2
import numpy as np
## Define window
win = tk.Tk()
win.title('CLASSIFIER GUI - Great Learning')
Name=ttk.Label(win,text="Step 1 : File Name")
Name.grid(row=0,column=0,sticky=tk.W)
Name_var=tk.StringVar()
Name_entrybox=ttk.Entry(win,width=16,textvariable=Name_var)
Name_entrybox.grid(row=0,column=1)
def Import_Data():
global DB
img_name=Name_var
def Predict_Image():
P = cv2.imread(Name_var)
P = cv2.resize(P, (128, 128))
Predict_Image.append(dummy)
Predict_Image=PI.reshape(1,128,128,3)
DB_Classify['Prediction']=model_cnn.predict(Predict_Image)
return DB_Classify['Prediction']
Import_Data_Button=ttk.Button(win,text="Import Data",command=Import_Data)
Import_Data_Button.grid(row=0,column=2)
Label_Name=ttk.Label(win,text="Image Classifier")
Name.grid(row=1,column=0,sticky=tk.W)
Predict_Button=ttk.Button(win,text="Predict",command=Predict_Image)
Predict_Button.grid(row=2,column=1,sticky=tk.W)
win.mainloop()
CNN and Transfer learning got good accuracy with test data set compared to Neural nets and Supervised learning
Since the number of records are very less Transfer learning and CNN tried to get overfit.
Transfer learning helped to reduce the losses and achieve better accuracy in the initial epochs itself.
Other transfer learning models used (commented out) provided nearly the same result as MobileNet model.
Maintaining accuracy of model once in production is key item as part of support and maintenance of AIML model.
Unseen data causes model performance to decay for which model needs to be retrained with the collected data sets and redeployed.
Metrics of Accuracy score and MAE,MSE and RSquared needs to be monitored depending on business needs for model performance and then needs to be retuned if they are at unacceptable level.
As like Devops model development and deployment is a continuous process and another version of production model needs to be improved with new set of data and hyper parameter tuning.